From f481a3f9c3d69d1a7a059538547550142ac66792 Mon Sep 17 00:00:00 2001 From: Keir Fraser Date: Sat, 14 Nov 2009 08:09:50 +0000 Subject: [PATCH] xend: Balloon down memory to achive enough DMA32 memory for PV guests with PCI pass-through to succesfully launch. If the user hasn't used dom0_mem=3D bootup parameter, the privileged domain usurps all of the memory. During launch of PV guests with PCI pass-through we ratchet down the memory for the privileged domain to the required memory for the PV guest. However, for PV guests with PCI pass-through we do not take into account that the PV guest is going to swap its SWIOTLB memory for DMA32 memory - in fact, swap 64MB of it. This patch balloon's down the privileged domain so that there are 64MB of DMA32 memory available. From: Konrad Rzeszutek Wilk Signed-off-by: Keir Fraser --- tools/python/xen/lowlevel/xc/xc.c | 18 ++++++++ tools/python/xen/xend/XendConfig.py | 7 ++++ tools/python/xen/xend/XendDomainInfo.py | 56 ++++++++++++++++++++++++- tools/python/xen/xend/XendNode.py | 12 ++++-- 4 files changed, 87 insertions(+), 6 deletions(-) diff --git a/tools/python/xen/lowlevel/xc/xc.c b/tools/python/xen/lowlevel/xc/xc.c index 12ea007a53..4c90579c68 100644 --- a/tools/python/xen/lowlevel/xc/xc.c +++ b/tools/python/xen/lowlevel/xc/xc.c @@ -1059,6 +1059,7 @@ static PyObject *pyxc_physinfo(XcObject *self) int i, j, max_cpu_id; uint64_t free_heap; PyObject *ret_obj, *node_to_cpu_obj, *node_to_memory_obj; + PyObject *node_to_dma32_mem_obj; xc_cpu_to_node_t map[MAX_CPU_ID + 1]; const char *virtcap_names[] = { "hvm", "hvm_directio" }; @@ -1128,10 +1129,27 @@ static PyObject *pyxc_physinfo(XcObject *self) Py_DECREF(pyint); } + xc_dom_loginit(); + /* DMA memory. */ + node_to_dma32_mem_obj = PyList_New(0); + + for ( i = 0; i < info.nr_nodes; i++ ) + { + PyObject *pyint; + + xc_availheap(self->xc_handle, 0, 32, i, &free_heap); + xc_dom_printf("Node:%d: DMA32:%ld\n", i, free_heap); + pyint = PyInt_FromLong(free_heap / 1024); + PyList_Append(node_to_dma32_mem_obj, pyint); + Py_DECREF(pyint); + } + PyDict_SetItemString(ret_obj, "node_to_cpu", node_to_cpu_obj); Py_DECREF(node_to_cpu_obj); PyDict_SetItemString(ret_obj, "node_to_memory", node_to_memory_obj); Py_DECREF(node_to_memory_obj); + PyDict_SetItemString(ret_obj, "node_to_dma32_mem", node_to_dma32_mem_obj); + Py_DECREF(node_to_dma32_mem_obj); return ret_obj; #undef MAX_CPU_ID diff --git a/tools/python/xen/xend/XendConfig.py b/tools/python/xen/xend/XendConfig.py index 6a168a264d..0eadf343d3 100644 --- a/tools/python/xen/xend/XendConfig.py +++ b/tools/python/xen/xend/XendConfig.py @@ -2111,6 +2111,13 @@ class XendConfig(dict): def is_hap(self): return self['platform'].get('hap', 0) + def is_pv_and_has_pci(self): + for dev_type, dev_info in self.all_devices_sxpr(): + if dev_type != 'pci': + continue + return not self.is_hvm() + return False + def update_platform_pci(self): pci = [] for dev_type, dev_info in self.all_devices_sxpr(): diff --git a/tools/python/xen/xend/XendDomainInfo.py b/tools/python/xen/xend/XendDomainInfo.py index 212d1d3927..f6becb6bcc 100644 --- a/tools/python/xen/xend/XendDomainInfo.py +++ b/tools/python/xen/xend/XendDomainInfo.py @@ -2580,7 +2580,8 @@ class XendDomainInfo: def _setCPUAffinity(self): - """ Repin domain vcpus if a restricted cpus list is provided + """ Repin domain vcpus if a restricted cpus list is provided. + Returns the choosen node number. """ def has_cpus(): @@ -2597,6 +2598,7 @@ class XendDomainInfo: return True return False + index = 0 if has_cpumap(): for v in range(0, self.info['VCPUs_max']): if self.info['vcpus_params'].has_key('cpumap%i' % v): @@ -2647,6 +2649,54 @@ class XendDomainInfo: cpumask = info['node_to_cpu'][index] for v in range(0, self.info['VCPUs_max']): xc.vcpu_setaffinity(self.domid, v, cpumask) + return index + + def _freeDMAmemory(self, node): + + # If we are PV and have PCI devices the guest will + # turn on a SWIOTLB. The SWIOTLB _MUST_ be located in the DMA32 + # zone (under 4GB). To do so, we need to balloon down Dom0 to where + # there is enough (64MB) memory under the 4GB mark. This balloon-ing + # might take more memory out than just 64MB thought :-( + if not self.info.is_pv_and_has_pci(): + return + + retries = 2000 + ask_for_mem = 0; + need_mem = 0 + try: + while (retries > 0): + physinfo = xc.physinfo() + free_mem = physinfo['free_memory'] + nr_nodes = physinfo['nr_nodes'] + node_to_dma32_mem = physinfo['node_to_dma32_mem'] + if (node > nr_nodes): + return; + # Extra 2MB above 64GB seems to do the trick. + need_mem = 64 * 1024 + 2048 - node_to_dma32_mem[node] + # our starting point. We ask just for the difference to + # be have an extra 64MB under 4GB. + ask_for_mem = max(need_mem, ask_for_mem); + if (need_mem > 0): + log.debug('_freeDMAmemory (%d) Need %dKiB DMA memory. ' + 'Asking for %dKiB', retries, need_mem, + ask_for_mem) + + balloon.free(ask_for_mem, self) + ask_for_mem = ask_for_mem + 2048; + else: + # OK. We got enough DMA memory. + break + retries = retries - 1 + except: + # This is best-try after all. + need_mem = max(1, need_mem); + pass + + if (need_mem > 0): + log.warn('We tried our best to balloon down DMA memory to ' + 'accomodate your PV guest. We need %dKiB extra memory.', + need_mem) def _setSchedParams(self): if XendNode.instance().xenschedinfo() == 'credit': @@ -2668,7 +2718,7 @@ class XendDomainInfo: # repin domain vcpus if a restricted cpus list is provided # this is done prior to memory allocation to aide in memory # distribution for NUMA systems. - self._setCPUAffinity() + node = self._setCPUAffinity() # Set scheduling parameters. self._setSchedParams() @@ -2730,6 +2780,8 @@ class XendDomainInfo: if self.info.target(): self._setTarget(self.info.target()) + self._freeDMAmemory(node) + self._createDevices() self.image.cleanupTmpImages() diff --git a/tools/python/xen/xend/XendNode.py b/tools/python/xen/xend/XendNode.py index 0fbefef6f8..bb1dad4eab 100644 --- a/tools/python/xen/xend/XendNode.py +++ b/tools/python/xen/xend/XendNode.py @@ -872,11 +872,11 @@ class XendNode: except: str='none\n' return str[:-1]; - def format_node_to_memory(self, pinfo): + def format_node_to_memory(self, pinfo, key): str='' whitespace='' try: - node_to_memory=pinfo['node_to_memory'] + node_to_memory=pinfo[key] for i in range(0, pinfo['nr_nodes']): str+='%snode%d:%d\n' % (whitespace, i, @@ -896,7 +896,10 @@ class XendNode: info['total_memory'] = info['total_memory'] / 1024 info['free_memory'] = info['free_memory'] / 1024 info['node_to_cpu'] = self.format_node_to_cpu(info) - info['node_to_memory'] = self.format_node_to_memory(info) + info['node_to_memory'] = self.format_node_to_memory(info, + 'node_to_memory') + info['node_to_dma32_mem'] = self.format_node_to_memory(info, + 'node_to_dma32_mem') ITEM_ORDER = ['nr_cpus', 'nr_nodes', @@ -908,7 +911,8 @@ class XendNode: 'total_memory', 'free_memory', 'node_to_cpu', - 'node_to_memory' + 'node_to_memory', + 'node_to_dma32_mem' ] return [[k, info[k]] for k in ITEM_ORDER] -- 2.30.2